from PIL import Image
from statistics import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cse351_hw1_zhang_junhui_112895310
airbnb_data = pd.read_csv("AB_NYC_2019.csv")
# Question 1
removedAnomaly = cse351_hw1_zhang_junhui_112895310.removeAnomaly(airbnb_data);
removedAnomaly
| id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2539 | Clean & quiet apt home by the park | 2787 | John | Brooklyn | Kensington | 40.64749 | -73.97237 | Private room | 149 | 1 | 9 | 10/19/2018 | 0.21 | 6 | 365 |
| 1 | 2595 | Skylit Midtown Castle | 2845 | Jennifer | Manhattan | Midtown | 40.75362 | -73.98377 | Entire home/apt | 225 | 1 | 45 | 5/21/2019 | 0.38 | 2 | 355 |
| 2 | 3647 | THE VILLAGE OF HARLEM....NEW YORK ! | 4632 | Elisabeth | Manhattan | Harlem | 40.80902 | -73.94190 | Private room | 150 | 3 | 0 | NaN | NaN | 1 | 365 |
| 3 | 3831 | Cozy Entire Floor of Brownstone | 4869 | LisaRoxanne | Brooklyn | Clinton Hill | 40.68514 | -73.95976 | Entire home/apt | 89 | 1 | 270 | 7/5/2019 | 4.64 | 1 | 194 |
| 5 | 5099 | Large Cozy 1 BR Apartment In Midtown East | 7322 | Chris | Manhattan | Murray Hill | 40.74767 | -73.97500 | Entire home/apt | 200 | 3 | 74 | 6/22/2019 | 0.59 | 1 | 129 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 48890 | 36484665 | Charming one bedroom - newly renovated rowhouse | 8232441 | Sabrina | Brooklyn | Bedford-Stuyvesant | 40.67853 | -73.94995 | Private room | 70 | 2 | 0 | NaN | NaN | 2 | 9 |
| 48891 | 36485057 | Affordable room in Bushwick/East Williamsburg | 6570630 | Marisol | Brooklyn | Bushwick | 40.70184 | -73.93317 | Private room | 40 | 4 | 0 | NaN | NaN | 2 | 36 |
| 48892 | 36485431 | Sunny Studio at Historical Neighborhood | 23492952 | Ilgar & Aysel | Manhattan | Harlem | 40.81475 | -73.94867 | Entire home/apt | 115 | 10 | 0 | NaN | NaN | 1 | 27 |
| 48893 | 36485609 | 43rd St. Time Square-cozy single bed | 30985759 | Taz | Manhattan | Hell's Kitchen | 40.75751 | -73.99112 | Shared room | 55 | 1 | 0 | NaN | NaN | 6 | 2 |
| 48894 | 36487245 | Trendy duplex in the very heart of Hell's Kitchen | 68119814 | Christophe | Manhattan | Hell's Kitchen | 40.76404 | -73.98933 | Private room | 90 | 7 | 0 | NaN | NaN | 1 | 23 |
31103 rows × 16 columns
# Question 2A
top5bot5List = cse351_hw1_zhang_junhui_112895310.getTop5Bot5Price(removedAnomaly)
top5bot5List
{'Top5': ['Riverdale', 'Tribeca', 'Flatiron District', 'NoHo', 'SoHo'],
'Bot5': ['Schuylerville',
'Hunts Point',
'New Brighton',
'Soundview',
'Far Rockaway']}
# Question 2B
cse351_hw1_zhang_junhui_112895310.plotTop5Bot5(top5bot5List, removedAnomaly)
# Question 3
cse351_hw1_zhang_junhui_112895310.createCorrelationHeatMap(removedAnomaly)
# Question 4A
cse351_hw1_zhang_junhui_112895310.plotByBorough(removedAnomaly)
# Question 4B
cse351_hw1_zhang_junhui_112895310.generatePlotByPrice(removedAnomaly)
# Question 5
cse351_hw1_zhang_junhui_112895310.generateWordCloud(removedAnomaly)
# Question 6
cse351_hw1_zhang_junhui_112895310.generatePlotByListings(removedAnomaly)
cse351_hw1_zhang_junhui_112895310.generatePlotByAvailability(removedAnomaly)
cse351_hw1_zhang_junhui_112895310.generatePlotByPrice(removedAnomaly)
cse351_hw1_zhang_junhui_112895310.generatePlotByReviews(removedAnomaly)
# Question 7
cse351_hw1_zhang_junhui_112895310.generatePlotByRoomType(airbnb_data)
cse351_hw1_zhang_junhui_112895310.generatePlotByPrice(removedAnomaly)
# Explainations in py file